Libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(usmap)
library(maps)
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v tibble  2.1.3     v purrr   0.3.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x purrr::map()    masks maps::map()
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(viridis)
## Loading required package: viridisLite
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.4-8, (SVN revision 845)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/16196/OneDrive/Documents/R/win-library/3.6/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/16196/OneDrive/Documents/R/win-library/3.6/rgdal/proj
##  Linking to sp version: 1.4-1
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
require(janitor)
## Loading required package: janitor
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
require(reshape2)
## Loading required package: reshape2
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
require(tidyr)
require(rstan)
## Loading required package: rstan
## Loading required package: StanHeaders
## rstan (Version 2.19.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For improved execution time, we recommend calling
## Sys.setenv(LOCAL_CPPFLAGS = '-march=native')
## although this causes Stan to throw an error on a few processors.
## 
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
## 
##     extract
require(rstanarm)
## Loading required package: rstanarm
## Loading required package: Rcpp
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
## rstanarm (Version 2.19.2, packaged: 2019-10-01 20:20:33 UTC)
## - Do not expect the default priors to remain the same in future rstanarm versions.
## Thus, R scripts should specify priors explicitly, even if they are just the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
## 
## Attaching package: 'rstanarm'
## The following object is masked from 'package:rstan':
## 
##     loo
require(bayesplot)
## Loading required package: bayesplot
## This is bayesplot version 1.7.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting



Data

Finaldata<- read.csv("time_series_final_Data.csv")
google<-read.csv("Google/googlefinal.csv")
names(google)
## [1] "X.1"                "X"                  "Day"               
## [4] "State"              "ChinaVirusInterest" "KungFluInterest"   
## [7] "Region"

Re-Cleaned

google <- google %>%  select(-c(X.1))
Finaldata <- Finaldata %>% select(-c(X)) 
Finaldata <- Finaldata %>% mutate(Day= as.Date(Day))

Dataset Description

Demographic: Our dataset, Demographic, was created by merging three other datasets which all contained different demographic and election information. We obtained the main portion of our demographic data from the US Census Bureau’s American Community Survey (ACS) which is an ongoing survey administered by the U.S. Census Bureau. It gathers information on income, employment, housing characteristics, etc, annually for all the 50 U.S. States on the county and state level. To access the county-level dataset we used the R package called Choroplethr which provides API connections to data sources like the ACS. The ACS County-Level dataset was then merged with a county-level election outcome dataset that was created by Tony McGoven. Tony’s dataset contained presidential election results for 2008,2012, and 2016 but we chose to focus solely on the most recent election,2016. That said, the 2016 election results at the county-level were scraped from results published by Townhall.com. However, the State of Alaska reports results at the precinct or state level so there was no county-level data available. Therefore, another dataset had to be created that contained the election results for Alaska and this was done using the official election results provided by the Alaska Division of Elections and was later merged in. The final dataset that was used came from Alicia Johnson and it contained information on a state’s political leaning. Meaning it categorizes each county as belonging to a blue/red/purple state based on the state categorizations at 279towin.

COVID-19 Cases The COVID-19 data is provided by The COVID Tracking Project(CTP). All of the data points come from state/district/territory public health authorities—or, occasionally, from trusted news reporting, official press conferences, or (very occasionally) tweets or Facebook updates from state public health authorities or governors. These numbers are updated daily at 4PM EST. The biggest weakness of this dataset is that there is no standardized methods for states to follow for data collection/report. For example, some states, like Oregon, provide the full set of numbers but others provide some or none of these numbers on an ongoing basis. Some crucial states in this outbreak, notably California, Washington, and New York, have not been regularly reporting their total number of people tested. The CTP aims to remedy this uncertainty in states by utilizing other reporting/measuring tools such as: “Directly asking state officials, watching news conferences, gleaning information from trusted news sources, and whatever else it takes to present reliable numbers.”

Google Search Interest This data set includes two search interest indexes over time, measuring how people in each of the state’s interest in searching either “Kung Flu” or “China Virus” based on the time frame selected in the search. This data is downloaded directly from Google Trends which uses the same technique to track the interest of all searches on the platform. The main downside to this data set is the method of the indexing which makes the comparison from state to state less meaningful since each state is guaranteed to have a 100-level interest on their peak day, and the actual unknown search values can vary greatly across different states.

Dataset Details

dim(Finaldata)
## [1] 765  31
names(Finaldata)
##  [1] "Day"                    "State"                 
##  [3] "ChinaVirusInterest"     "KungFluInterest"       
##  [5] "positive"               "negative"              
##  [7] "death"                  "hospitalized"          
##  [9] "totalTestResults"       "FIPS"                  
## [11] "StayAtHome_date"        "Quarantine_Yes"        
## [13] "polyname"               "StateColor"            
## [15] "total_2016"             "dem_2016"              
## [17] "gop_2016"               "oth_2016"              
## [19] "total_population"       "percent_white"         
## [21] "percent_black"          "percent_asian"         
## [23] "percent_hispanic"       "per_capita_income"     
## [25] "median_rent"            "median_age"            
## [27] "percent_democrat2016"   "percent_republican2016"
## [29] "percent_other2016"      "Winner"                
## [31] "Region"
head(Finaldata)
##          Day State ChinaVirusInterest KungFluInterest positive negative
## 1 2020-03-10    AK                 27               0        0       23
## 2 2020-03-11    AK                  0               0        0       46
## 3 2020-03-12    AK                 26               0        0       46
## 4 2020-03-13    AK                 28               0        1       59
## 5 2020-03-14    AK                 30               0        1      143
## 6 2020-03-15    AK                 61               0        1      143
##   death hospitalized totalTestResults FIPS StayAtHome_date Quarantine_Yes
## 1    NA           NA               23    2      03/28/2020              1
## 2    NA           NA               46    2      03/28/2020              1
## 3    NA           NA               46    2      03/28/2020              1
## 4    NA           NA               60    2      03/28/2020              1
## 5    NA           NA              144    2      03/28/2020              1
## 6    NA           NA              144    2      03/28/2020              1
##   polyname StateColor total_2016 dem_2016 gop_2016 oth_2016
## 1   alaska        red     318608   116454   163387    38767
## 2   alaska        red     318608   116454   163387    38767
## 3   alaska        red     318608   116454   163387    38767
## 4   alaska        red     318608   116454   163387    38767
## 5   alaska        red     318608   116454   163387    38767
## 6   alaska        red     318608   116454   163387    38767
##   total_population percent_white percent_black percent_asian
## 1           741456          0.65          0.04          0.07
## 2           741456          0.65          0.04          0.07
## 3           741456          0.65          0.04          0.07
## 4           741456          0.65          0.04          0.07
## 5           741456          0.65          0.04          0.07
## 6           741456          0.65          0.04          0.07
##   percent_hispanic per_capita_income median_rent median_age
## 1             0.07             34922          NA       34.7
## 2             0.07             34922          NA       34.7
## 3             0.07             34922          NA       34.7
## 4             0.07             34922          NA       34.7
## 5             0.07             34922          NA       34.7
## 6             0.07             34922          NA       34.7
##   percent_democrat2016 percent_republican2016 percent_other2016     Winner
## 1            0.3655087              0.5128151         0.1216762 Republican
## 2            0.3655087              0.5128151         0.1216762 Republican
## 3            0.3655087              0.5128151         0.1216762 Republican
## 4            0.3655087              0.5128151         0.1216762 Republican
## 5            0.3655087              0.5128151         0.1216762 Republican
## 6            0.3655087              0.5128151         0.1216762 Republican
##   Region
## 1   West
## 2   West
## 3   West
## 4   West
## 5   West
## 6   West
summary(Finaldata)
##       Day                 State     ChinaVirusInterest KungFluInterest 
##  Min.   :2020-03-10   AK     : 15   Min.   :  0.00     Min.   : 0.000  
##  1st Qu.:2020-03-13   AL     : 15   1st Qu.: 26.00     1st Qu.: 0.000  
##  Median :2020-03-17   AR     : 15   Median : 37.00     Median : 0.000  
##  Mean   :2020-03-17   AZ     : 15   Mean   : 37.24     Mean   : 3.903  
##  3rd Qu.:2020-03-21   CA     : 15   3rd Qu.: 49.00     3rd Qu.: 5.000  
##  Max.   :2020-03-24   CO     : 15   Max.   :100.00     Max.   :58.000  
##                       (Other):675                                      
##     positive          negative         death          hospitalized    
##  Min.   :    0.0   Min.   :    0   Min.   :  0.000   Min.   :   0.00  
##  1st Qu.:   10.0   1st Qu.:   94   1st Qu.:  1.000   1st Qu.:   3.75  
##  Median :   38.0   Median :  337   Median :  2.000   Median :  32.50  
##  Mean   :  322.2   Mean   : 1820   Mean   :  8.728   Mean   : 210.91  
##  3rd Qu.:  152.0   3rd Qu.: 1447   3rd Qu.:  5.000   3rd Qu.:  73.75  
##  Max.   :25665.0   Max.   :65605   Max.   :210.000   Max.   :3234.00  
##                    NA's   :41      NA's   :416       NA's   :709      
##  totalTestResults      FIPS         StayAtHome_date Quarantine_Yes
##  Min.   :    0    Min.   : 1.00   03/24/2020: 90    Min.   :0.00  
##  1st Qu.:  103    1st Qu.:16.00   03/28/2020: 75    1st Qu.:1.00  
##  Median :  380    Median :29.00   03/30/2020: 75    Median :1.00  
##  Mean   : 2045    Mean   :28.96   03/23/2020: 60    Mean   :0.88  
##  3rd Qu.: 1500    3rd Qu.:42.00   03/25/2020: 60    3rd Qu.:1.00  
##  Max.   :91270    Max.   :56.00   (Other)   :300    Max.   :1.00  
##                                   NA's      :105    NA's   :15    
##        polyname    StateColor    total_2016         dem_2016      
##  alabama   : 15   blue  :285   Min.   : 248742   Min.   :  55949  
##  alaska    : 15   purple:165   1st Qu.: 730628   1st Qu.: 266827  
##  arizona   : 15   red   :315   Median :1922218   Median : 779535  
##  arkansas  : 15                Mean   :2489256   Mean   :1188393  
##  california: 15                3rd Qu.:3208899   3rd Qu.:1534487  
##  colorado  : 15                Max.   :9631972   Max.   :5931283  
##  (Other)   :675                                                   
##     gop_2016          oth_2016      total_population   percent_white   
##  Min.   :  11553   Min.   :  8496   Min.   :  570134   Min.   :0.2300  
##  1st Qu.: 345598   1st Qu.: 38767   1st Qu.: 1583364   1st Qu.:0.5900  
##  Median : 947934   Median : 91364   Median : 4361333   Median :0.7400  
##  Mean   :1179254   Mean   :121608   Mean   : 6108975   Mean   :0.7029  
##  3rd Qu.:1535513   3rd Qu.:183694   3rd Qu.: 6819579   3rd Qu.:0.8300  
##  Max.   :4681590   Max.   :515968   Max.   :37659181   Max.   :0.9400  
##                                                                        
##  percent_black    percent_asian     percent_hispanic per_capita_income
##  Min.   :0.0000   Min.   :0.01000   Min.   :0.0100   Min.   :20618    
##  1st Qu.:0.0300   1st Qu.:0.01000   1st Qu.:0.0400   1st Qu.:24635    
##  Median :0.0700   Median :0.02000   Median :0.0800   Median :26824    
##  Mean   :0.1084   Mean   :0.03765   Mean   :0.1082   Mean   :28098    
##  3rd Qu.:0.1500   3rd Qu.:0.04000   3rd Qu.:0.1300   3rd Qu.:30469    
##  Max.   :0.4900   Max.   :0.37000   Max.   :0.4700   Max.   :45290    
##                                                                       
##   median_rent       median_age    percent_democrat2016
##  Min.   : 448.0   Min.   :29.60   Min.   :0.2249      
##  1st Qu.: 564.0   1st Qu.:36.30   1st Qu.:0.3609      
##  Median : 658.0   Median :37.60   Median :0.4670      
##  Mean   : 714.3   Mean   :37.66   Mean   :0.4501      
##  3rd Qu.: 838.0   3rd Qu.:39.00   3rd Qu.:0.5335      
##  Max.   :1220.0   Max.   :43.20   Max.   :0.9285      
##  NA's   :15                                           
##  percent_republican2016 percent_other2016        Winner          Region   
##  Min.   :0.04122        Min.   :0.01937   Democrat  :315   Midwest  :195  
##  1st Qu.:0.41161        1st Qu.:0.04160   Republican:450   Mountain :120  
##  Median :0.49064        Median :0.05071                    Northeast:180  
##  Mean   :0.49010        Mean   :0.05976                    South    :195  
##  3rd Qu.:0.58095        3rd Qu.:0.06991                    West     : 75  
##  Max.   :0.70052        Max.   :0.25598                                   
## 
colnames(Finaldata)
##  [1] "Day"                    "State"                 
##  [3] "ChinaVirusInterest"     "KungFluInterest"       
##  [5] "positive"               "negative"              
##  [7] "death"                  "hospitalized"          
##  [9] "totalTestResults"       "FIPS"                  
## [11] "StayAtHome_date"        "Quarantine_Yes"        
## [13] "polyname"               "StateColor"            
## [15] "total_2016"             "dem_2016"              
## [17] "gop_2016"               "oth_2016"              
## [19] "total_population"       "percent_white"         
## [21] "percent_black"          "percent_asian"         
## [23] "percent_hispanic"       "per_capita_income"     
## [25] "median_rent"            "median_age"            
## [27] "percent_democrat2016"   "percent_republican2016"
## [29] "percent_other2016"      "Winner"                
## [31] "Region"

Variables of Interest

Variables: Description:

polyname

State Name

StateColor

Political Leaning

percent_hispanic

Percent of the Population that is Hispanic

percent_white

Percent of the Population that is White

percent_asian

Percent of the Population that is Asian

percent_black

Percent of Population that is Black

total_population

Total State Population

per_capita_income

Income per Capita

percent_democrat2016

Percent of votes won by Democrat (Clinton)

percent_republican2016

Percent of votes won by Republican (Trump)

Winner

Indicator for whether a Republican or Democrat Won

total_2016

Total Number of Votes

Positive

Number of reported positive COVID-19 cases

Negative

Number of reported negative COVID-19 cases

date

Date of report

death

Total Number of reported deaths due to COVID-19

hospitalized

Total Number of indivudals hopitalized due to COVID-19

totalTestResults

Total Number test results (Positive +Negative)

FIPS

A five-digit Federal Information Processing Standards code which uniquely identified counties and county

KungFluInterest

Interest index from google searches by state. Peak search day=100, all other days in set are based searches on relative to this peak day.

ChinaVirusInterest

Interest index from google searches by state. Peak search day=100, all other days in set are based searches on relative to this peak day.

Visualizations

library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
## 
##     wind
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
#google_ts<-read.csv("Time_series_Google.csv")

day1<-as.Date("03/10/2020",format = "%m/%d/%Y")
day2<-as.Date("03/24/2020",format = "%m/%d/%Y")

google_ts<-google%>%
  mutate(Day = as.Date(as.character(Day)))

a<-google_ts %>% 
  #filter(Day<=day2)%>%
  #filter(Day>=day1)%>%
  group_by(Region, Day) %>% 
  summarize(ChinaVirusSearch = median(ChinaVirusInterest)) %>% 
  ggplot(aes(x=Day, y=ChinaVirusSearch, color=Region))+
  geom_point()

ggplotly(a)

Demographic

#Demographic: Identification 
Finaldata <- data.frame(Finaldata) %>% mutate(state = State)
plot_usmap(data = Finaldata, values = "percent_white", color = "white") + 
  scale_fill_continuous(name = "Percent White", label = scales::comma) + 
  theme(legend.position = "right")+ ggtitle("Percent of Residents that Identify as White") + 
  theme(
plot.title = element_text(color="Black", size=14, face="bold")
)

plot_usmap(data = Finaldata, values = "percent_asian", color = "white") + 
  scale_fill_continuous(low = "sky blue", high = "black", name = "Percent Asian", label = scales::comma) + 
  theme(legend.position = "right") + ggtitle("Percent of Residents that Identify as Asian") +
  theme(
plot.title = element_text(color="Black", size=14, face="bold")
) 

#By Results 
plot_usmap(data = Finaldata, values = "percent_asian", color = "white") + 
  scale_fill_continuous(low = "sky blue", high = "black", name = "Percent Asian", label = scales::comma) + 
  theme(legend.position = "right") + ggtitle("Percent Asian by General Election Results")+
  theme(
plot.title = element_text(color="Black", size=14, face="bold")
) + facet_wrap(~Winner)

#By income 
plot_usmap(data = Finaldata, values = "per_capita_income", color = "white") + 
  scale_fill_continuous(low = "sky blue", high = "black", name = "$ Per Capita", label = scales::comma) + 
  theme(legend.position = "right") + ggtitle("Percent Capita Income")+
  theme(
plot.title = element_text(color="Black", size=14, face="bold")
) 

#Income by state affiliation 
ggplot(Finaldata, aes(x = per_capita_income, fill = StateColor)) + 
    geom_density(alpha = .8)+ ggtitle("Income per Capita") + xlab("$") + ylab("Density")

#Trump Support 
ggplot(Finaldata, aes(x = percent_republican2016)) + 
    geom_density()+ ggtitle("Trump Support During the 2016 Elections") + xlab("Percent of Trump Support") + ylab("Density")

ggplot(Finaldata, aes(x = percent_asian, y = percent_republican2016, color = Winner)) + 
    scale_color_manual(values = c("blue","purple","red")) + 
    geom_point(alpha = 0.8) + geom_text(aes(label=ifelse(percent_asian>.2,as.character(state),"")),hjust=1.2,vjust=0)+ geom_text(aes(label=ifelse(percent_republican2016<.2,as.character(state),"")),hjust=-.1,vjust=0) + ggtitle("Percent of Asians and Trump Support by Election Outcome") + xlab("Percent Asian") + ylab("Trump Support")

ggplot(Finaldata, aes(x = per_capita_income)) + 
    geom_density()+ ggtitle("Per Capita Income") + xlab("Per Capita Income") + ylab("Density") + facet_wrap(~Winner)

COVID-19 Cases

## Creating a df of just the cases the week before 3/17
weekbefore <- data.frame(Finaldata) %>% filter(Day <= as.Date("2020-03-17"))

plot1<- plot_usmap(data = weekbefore, values = "positive", color = "white") + 
  scale_fill_continuous(name = "Cases", label = scales::comma) + 
  theme(legend.position = "right")+ ggtitle("# of COVID Cases Before 3/17") + 
  theme(
plot.title = element_text(color="Black", size=8)
)

## Creating a df of just the cases the week after 3/17
weekafter <- data.frame(Finaldata) %>% filter(Day >= as.Date("2020-03-17"))

plot2<-plot_usmap(data = weekafter, values = "positive", color = "white") + 
  scale_fill_continuous(name = "Cases", label = scales::comma) + 
  theme(legend.position = "right")+ ggtitle("# of COVID Cases After 3/17") + 
  theme(
plot.title = element_text(color="Black", size=5)
)


# The number of Positive Covid Cases, comparison before and after the China Virus announcement 
grid.arrange(plot1, plot2, ncol=2)

# Positive COVID cases aggregated by party 
ggplot(Finaldata, aes(x = positive, fill = StateColor)) + 
    geom_density(alpha = .8)+ ggtitle("Positive COVID-19 Cases by State Party") + xlab("Cases") + ylab("Density")+xlim(0,500)
## Warning: Removed 81 rows containing non-finite values (stat_density).

# ## Positive COVID cases map before and after week 
# plot3<-statebins(state_data = weekbefore, state_col = "State",
#                      text_color = "white", value_col = "positive",
#                      brewer_pal="Spectral", font_size = 3,breaks =7,
#                      legend_title="Week Before",
#                  labels = c("0","1-5","5-10","10-20","20-30","30-40","50-100"))
# 
# ### Drastic scale change, needs work for comparison sake
# plot4<-statebins(state_data = weekafter, state_col = "state",
#                      text_color = "white", value_col = "positive",
#                      brewer_pal="Spectral", font_size = 3,breaks =6,
#                      legend_title="Week After",labels = c("1-100", "100-500", "500-5000", "5000-15000", "15000-25000", "25000-35000"))
# 
# plot3
# plot4

Final Dataset

ggplot(Finaldata, aes(x = ChinaVirusInterest, fill = as.factor(State))) + 
  geom_density(alpha = 0.5)

ggplot(Finaldata, aes(x = Day, y = ChinaVirusInterest, color = factor(State))) + 
    geom_point() + 
    geom_smooth(method = "lm", se = FALSE) + 
    facet_wrap(~ State) + 
    theme(legend.position = "none")



Analysis